#delimit;
clear all;
set more off;
graph set window fontface "Times New Roman";
graph set window fontfacesans "Times New Roman";
graph set window fontfaceserif "Times New Roman";
graph set window fontfacemono "Times New Roman";
graph set window fontfacesymbol "Times New Roman";
local root="/Users/lwdavis/Dropbox/Clean Energy Tax Credits/stata/irs";
cd "`root'";


*----------------------------------------------------*;
*------------Bring in Table 3.3 Data-----------------*;
*----------------------------------------------------*;
*Downloaded from IRS SOI in November 2023;
*https://www.irs.gov/statistics/soi-tax-stats-individual-statistical-tables-by-size-of-adjusted-gross-income
*https://www.irs.gov/statistics/soi-tax-stats-individual-income-tax-returns-complete-report-publication-1304-basic-tables-part-3
*I save XLS files as is for both the number of returns and total amounts (Table 3.3)
*As well as the coefficients of variation that are used to construction confidence intervals;
*The exact position of the columns changes across years so I do by hand for each year;
*The exact rows also moves around slightly across years;
*I check by hand to see that the sum matches the sum in the raw file (obviously doesn't work for CV)
*The relevant EV credit is the Qualified Plug-In Electric Vehicle Credit;
*The other one -- Qualified Electric Vehicle Credit -- is much smaller and only golf carts;
*IRS SOI is available starting in 1996;
*But none of these credits are available before 2006 so I start there;

foreach x in "ar" "cv" {;
import excel using "`root'/raw data/IRS SOI Table 3.3/06in33`x'.xls", clear; 
keep A B O P;
rename A income;
rename B totalreturns`x';
rename O re_returns`x';
rename P re_amount`x';
list totalreturns re* if _n==9;
keep in 10/28; destring, replace;
preserve; collapse (sum) totalreturns`x' re*; list; restore;
gen year=2006; save "`root'/intermediate/2006`x'.dta", replace; };

foreach x in "ar" "cv" {;
import excel using "`root'/raw data/IRS SOI Table 3.3/07in33`x'.xls", clear; 
keep A B O P AI AJ;
rename A income;
rename B totalreturns`x';
rename O re_returns`x';
rename P re_amount`x';
rename AI amv_returns`x';
rename AJ amv_amount`x';
list totalreturns re* amv* if _n==9;
keep in 10/28; destring, replace;
preserve; collapse (sum) totalreturns`x' re* amv*; list; restore;
gen year=2007; save "`root'/intermediate/2007`x'.dta", replace; };

foreach x in "ar" "cv" {;
import excel using "`root'/raw data/IRS SOI Table 3.3/08in33`x'.xls", clear; 
keep A B O P AA AB;
rename A income;
rename B totalreturns`x';
rename O re_returns`x';
rename P re_amount`x';
rename AA amv_returns`x';
rename AB amv_amount`x';
list totalreturns re* amv* if _n==9;
keep in 10/28; destring, replace;
preserve; collapse (sum) totalreturns re* amv*; list; restore;
gen year=2008; save "`root'/intermediate/2008`x'.dta", replace; };

foreach x in "ar" "cv" {;
import excel using "`root'/raw data/IRS SOI Table 3.3/09in33`x'.xls", clear; 
keep A B Q R Y Z AC AD AE AF;
rename A income;
rename B totalreturns`x';
rename Q re_returns`x';
rename R re_amount`x';
rename Y amv_returns`x';
rename Z amv_amount`x';
rename AC afv_returns`x';
rename AD afv_amount`x';
rename AE ev_returns`x';
rename AF ev_amount`x';
list totalreturns re* amv* afv* ev* if _n==9;
keep in 10/28; destring, replace;
preserve; collapse (sum) totalreturns re* amv* afv* ev*; list; restore;
gen year=2009; save "`root'/intermediate/2009`x'.dta", replace; };

foreach x in "ar" "cv" {;
import excel using "`root'/raw data/IRS SOI Table 3.3/10in33`x'.xls", clear; 
keep A B O P W X AA AB AC AD;
rename A income;
rename B totalreturns`x';
rename O re_returns`x';
rename P re_amount`x';
rename W amv_returns`x';
rename X amv_amount`x';
rename AA afv_returns`x';
rename AB afv_amount`x';
rename AC ev_returns`x';
rename AD ev_amount`x';
list totalreturns re* amv* afv* ev* if _n==9;
keep in 10/29; destring, replace;
preserve; collapse (sum) totalreturns re* amv* afv* ev*; list; restore;
gen year=2010; save "`root'/intermediate/2010`x'.dta", replace; };

foreach x in "ar" "cv" {;
import excel using "`root'/raw data/IRS SOI Table 3.3/11in33`x'.xls", clear; 
keep A B O P W X AA AB AC AD;
rename A income;
rename B totalreturns`x';
rename O re_returns`x';
rename P re_amount`x';
rename W amv_returns`x';
rename X amv_amount`x';
rename AA afv_returns`x';
rename AB afv_amount`x';
rename AC ev_returns`x';
rename AD ev_amount`x';
list totalreturns re* amv* afv* ev* if _n==10;
keep in 11/30; destring, replace;
preserve; collapse (sum) totalreturns re* amv* afv* ev*; list; restore;
gen year=2011; save "`root'/intermediate/2011`x'.dta", replace; };

foreach x in "ar" "cv" {;
import excel using "`root'/raw data/IRS SOI Table 3.3/12in33`x'.xls", clear; 
keep A B O P Y Z AC AD AE AF;
rename A income;
rename B totalreturns`x';
rename O re_returns`x';
rename P re_amount`x';
rename Y amv_returns`x';
rename Z amv_amount`x';
rename AC afv_returns`x';
rename AD afv_amount`x';
rename AE ev_returns`x';
rename AF ev_amount`x';
list totalreturns re* amv* afv* ev* if _n==10;
keep in 11/30; destring, replace;
preserve; collapse (sum) totalreturns re* amv* afv* ev*; list; restore;
gen year=2012; save "`root'/intermediate/2012`x'.dta", replace; };

foreach y of numlist 13/14 {;
foreach x in "ar" "cv" {;
import excel using "`root'/raw data/IRS SOI Table 3.3/`y'in33`x'.xls", clear; 
keep A B O P Y Z AC AD AE AF;
rename A income;
rename B totalreturns`x';
rename O re_returns`x';
rename P re_amount`x';
rename Y amv_returns`x';
rename Z amv_amount`x';
rename AC afv_returns`x';
rename AD afv_amount`x';
rename AE ev_returns`x';
rename AF ev_amount`x';
list totalreturns re* amv* afv* ev* if _n==10;
keep in 11/29; destring, replace;
preserve; collapse (sum) totalreturns re* amv* afv* ev*; list; restore;
gen year=20`y'; save "`root'/intermediate/20`y'`x'.dta", replace; }; };

foreach y of numlist 15/20 {;
foreach x in "ar" "cv" {;
import excel using "`root'/raw data/IRS SOI Table 3.3/`y'in33`x'.xls", clear; 
keep A B O P Y Z AA AB AC AD;
rename A income;
rename B totalreturns`x';
rename O re_returns`x';
rename P re_amount`x';
rename Y amv_returns`x';
rename Z amv_amount`x';
rename AA afv_returns`x';
rename AB afv_amount`x';
rename AC ev_returns`x';
rename AD ev_amount`x';
list totalreturns re* amv* afv* ev* if _n==10;
keep in 11/29; destring, replace;
preserve; collapse (sum) totalreturns re* amv* afv* ev*; list; restore;
gen year=20`y'; save "`root'/intermediate/20`y'`x'.dta", replace; }; };

foreach y of numlist 21 {;
foreach x in "ar" "cv" {;
import excel using "`root'/raw data/IRS SOI Table 3.3/`y'in33`x'.xls", clear; 
keep A B O P AA AB AC AD AE AF;
rename A income;
rename B totalreturns`x';
rename O re_returns`x';
rename P re_amount`x';
rename AA amv_returns`x';
rename AB amv_amount`x';
rename AC afv_returns`x';
rename AD afv_amount`x';
rename AE ev_returns`x';
rename AF ev_amount`x';
list totalreturns re* amv* afv* ev* if _n==10;
keep in 11/29; destring, replace;
preserve; collapse (sum) totalreturns re* amv* afv* ev*; list; restore;
gen year=20`y'; save "`root'/intermediate/20`y'`x'.dta", replace; }; };


*----------------------------------------------------*;
*------------Append and Clean Dataset----------------*;
*----------------------------------------------------*;
*Append Years, Average Returns (AR);
use "`root'/intermediate/2006ar.dta";
foreach y of numlist 2007/2021 {; append using "`root'/intermediate/`y'ar.dta"; };
save ar.dta, replace;

*Append Years, Coefficient of Variation (CV);
use "`root'/intermediate/2006cv.dta";
foreach y of numlist 2007/2021 {; append using "`root'/intermediate/`y'cv.dta"; };
save cv.dta, replace;

*Merge AR with CV;
merge 1:1 year income using ar.dta;
assert _merge==3; drop _merge totalreturnscv;
erase ar.dta; erase cv.dta;

*Convert all dollar amounts in dollars, rather than 1000s;
foreach x in ev afv re amv {;
replace `x'_amountar=`x'_amountar*1000; };

*Create Numerical Income Measure and Sort;
*Remarkably the income categories are almost the same across all years;
*One exception is that $200-$500 was divided into $200-$250 and $250-$500 in three of 15 years;
*All analyses combine those categories;
*Another issue is that in some cases, IRS SOI combines data from multiple rows to avoid potential disclosure of information;
*The documentation and tables themseleves are vague about which rows are combined;
*Combining categories helps with this problem, and is the best we can do without microdata;
gen y=0 if income=="No adjusted gross income" | income=="$1 under $5,000";
replace y=5     if income=="$5,000 under $10,000";
replace y=10    if income=="$10,000 under $15,000";
replace y=15    if income=="$15,000 under $20,000";
replace y=20    if income=="$20,000 under $25,000";
replace y=25    if income=="$25,000 under $30,000";
replace y=30    if income=="$30,000 under $40,000";
replace y=40    if income=="$40,000 under $50,000";
replace y=50    if income=="$50,000 under $75,000";
replace y=75    if income=="$75,000 under $100,000";
replace y=100   if income=="$100,000 under $200,000";
replace y=200   if income=="$200,000 under $500,000" | income=="$200,000 under $250,000" | income=="$250,000 under $500,000";
replace y=500   if income=="$500,000 under $1,000,000";
replace y=1000  if income=="$1,000,000 under $1,500,000";
replace y=1500  if income=="$1,500,000 under $2,000,000";
replace y=2000  if income=="$2,000,000 under $5,000,000";
replace y=5000  if income=="$5,000,000 under $10,000,000";
replace y=10000 if income=="$10,000,000 or more"; sort y; 

*APPROXIMATE QUINTILES;
*In 2020, there were 164 million total tax returns filed;
*I confirmed this by calculating the sum of TOTAL RETURNS over all income categories;
*You can also see this at the beginning of Table 3.3;
*Thus each quintile should have about 33 million returns;
gen quintile=0;
replace quintile=1 if y<15;
replace quintile=2 if y>=15  & y<30;
replace quintile=3 if y>=30  & y<50;
replace quintile=4 if y>=50  & y<100;
replace quintile=5 if y>=100 & y<200;
replace quintile=6 if y>=200;
save cleaned.dta, replace;

*Confirm that these are approximately quintiles;
preserve; keep if year==2021; collapse (sum) total, by(quintile);
sum total; gen percent_in_bin=total/`r(sum)'; list; 
drop percent_in_bin; restore;

*The IRS reports standard errors for all summary statistics,;
*expressed as a percentage of the statistic being estimated.;
*Thus: SE = CV * Xbar;
*Also, because SE = sigma / sqrt(n);
*We get sigma = sqrt(n) * CV * Xbar;
*And thus VAR = n * (CV * Xbar)^2;
*To combine across bins, we just sum these calculated variances, and use the formula;
*SEnew = sigma_new / sqrt(n_new) = sqrt(n1(CV1*Xbar1)^2 + n2(CV2*Xbar2)^2) / sqrt(n1 + n2);

*Descriptive Statistics Table, All Values in Millions;
*The first column includes both NEPC and REEPC;
*The table in the paper uses the line item estimates to separate these two;
*The table in the paper also omits the AMV and AFV, both are smaller credits;
collapse (sum) re_amountar amv_amountar afv_amountar ev_amountar, by(year);
foreach x of varlist re_amountar amv_amountar afv_amountar ev_amountar {;
replace `x'=round(`x'/1000000,1); };  
list; collapse (sum) re* amv* afv* ev*; list;


*-----------------------------------------*;
*---------Average Credit Amount-----------*;
*-----------------------------------------*;
foreach z in credit percent avg {;
foreach x in re ev {;
use cleaned.dta, clear;
keep `x'* total quintile; 

*Define statistics and standard errors;
generate z="`z'"; generate x="`x'"; 
if (z=="credit") {; local ytitle "Average Credit Per Return, in Dollars"; 
	gen sigma=sqrt(`x'_returnsar)*(`x'_amountcv/100)*`x'_amountar; 
	local stat="`x'_amountar/total"; local se="se/total";  };  
if (z=="percent") {; local ytitle "Percent Claiming Credit"; 
	gen sigma=sqrt(`x'_returnsar)*(`x'_returnscv/100)*`x'_returnsar;
	local stat="`x'_returnsar/total"; local se="se/total";  };  
if (z=="avg") {; local ytitle "Average Credit Per Claimant, in Dollars"; 
	gen sigma=sqrt(`x'_returnsar)*(`x'_amountcv/100)*`x'_amountar;
	local stat="`x'_amountar/`x'_returnsar"; local se="se/`x'_returnsar";  };  

*Prepare figure label;
if (x=="ev") {; local title "Electric Vehicle Credits";  };  
if (x=="re") {; local title "Residential Energy Credits";  };  

*Calculate standard error and confidence interval for combined category;
*In previous iteration we plotted 95% confidence intervals but they are so narrow they are hard to see;
gen var=sigma^2; collapse (sum) total `x'_returnsar `x'_amountar var, by(quintile);
gen se=sqrt(var)/sqrt(`x'_returnsar); 
gen stat=`stat'; replace se=`se';
gen lo=stat-2*se; gen hi=stat+2*se;

*List average credit amounts for mentioning in text;
generate z="`z'"; if (z=="credit") {; list quintile stat; };

*Plot Credit Dollars Per Return;
scatter stat quintile, xsize(5) ysize(3)
	msize(vlarge) msymbol(o) mcolor(black)
	xtitle("Annual Income, Thousands of Dollars", size(medium)) 
	ytitle("`ytitle'", size(medium)) 
	title("`title'", color(black) size(large)) 
	xscale(range(0.75 6.25))
	xlabel(1 "<15" 2 "15-30" 3 "30-50" 4 "50-100" 5 "100-200" 6 ">200", nogrid labsize(medium)) 
	ylabel(, angle(0) nogrid labsize(medium))
	plotregion(style(none)) graphregion(fcolor(white) lcolor(white)) legend(off);
graph export "`root'/figs/`z'_`x'.pdf", as(pdf) replace; }; };

*------------------------------------------------------*;
*------------Average Credit Amount By Year-------------*;
*------------------------------------------------------*;
*This is done only for the two largest credit categories;
*I loop over all years for all credits, but then subsequently ignore years where credits weren't available;
*There are some years and income categories for EVs with zero observations, so no statistic;
*These are valuable for ourselves, but we don't include in paper or appendix;
*foreach z in credit percent avg {;
foreach z in credit {;
foreach x in re ev  {;
foreach t in 2009 2013 2017 2021 {;
use cleaned.dta, clear;
keep if year==`t';
keep `x'* total quintile; 

*Define statistics and standard errors;
generate z="`z'"; 
if (z=="credit") {; local ytitle "Average Credit Per Return, in Dollars"; 
	gen sigma=sqrt(`x'_returnsar)*(`x'_amountcv/100)*`x'_amountar; 
	local stat="`x'_amountar/total"; local se="se/total";  };  
if (z=="percent") {; local ytitle "Percent Claiming Credit"; 
	gen sigma=sqrt(`x'_returnsar)*(`x'_returnscv/100)*`x'_returnsar;
	local stat="`x'_returnsar/total"; local se="se/total";  };  
if (z=="avg") {; local ytitle "Average Credit Per Claimant, in Dollars"; 
	gen sigma=sqrt(`x'_returnsar)*(`x'_amountcv/100)*`x'_amountar;
	local stat="`x'_amountar/`x'_returnsar"; local se="se/`x'_returnsar";  };  

*Calculate standard error and confidence interval for combined category;
*In previous iteration we plotted 95% confidence intervals but they are so narrow they are hard to see;
gen var=sigma^2; collapse (sum) total `x'_returnsar `x'_amountar var, by(quintile);
quietly sum `x'_amountar; local totalcredit=round(`r(mean)'*`r(N)'/1000000,1);
gen se=sqrt(var)/sqrt(`x'_returnsar);
gen stat=`stat'; replace se=`se';
gen lo=stat-2*se; gen hi=stat+2*se;

*Plot Credit Dollars Per Return;
scatter stat quintile, xsize(5) ysize(3)
	   msize(vlarge) msymbol(o) mcolor(black)
	   xtitle("Annual Income, Thousands of Dollars", size(medium)) 
	   ytitle("`ytitle'", size(medium)) 
	   title("`t'", color(black) size(large))
	   subtitle("$`totalcredit' million", color(black) size(medium))
	   xscale(range(0.75 6.25))
	   xlabel(1 "<15" 2 "15-30" 3 "30-50" 4 "50-100" 5 "100-200" 6 ">200", labsize(medium) nogrid) 
	   ylabel(, angle(0) nogrid labsize(medium))
	   plotregion(style(none)) graphregion(fcolor(white) lcolor(white)) legend(off); 
graph export "`root'/bonusfigs/`z'Yr_`x'_`t'.pdf", as(pdf) replace; }; }; };


*---------------------------------------*;
*---------Concentration Curves----------*;
*---------------------------------------*;
*Calculate cumulative fractions;
*The variable y is defined as the beginning of each income interval;
*For example, y=100 is between $100,000 and $200,000;
*The function `sum' in STATA gives you the sum through that observation;
*So cdf(100) gives you the sum of credits up to and including $100K-$200K;
foreach x in re ev {;
use cleaned.dta, clear;
collapse (sum) totalreturns `x'_amountar, by(y);
gen totalincome=y*totalreturns;
foreach var of varlist totalincome totalreturns `x'_amountar {;
quietly sum `var'; local sum=`r(sum)'; sort y; 
gen cdf_`var'=sum(`var')/`sum'; };

*Prepare figure label; 
generate x="`x'";  if (x=="ev") {; local title "Electric Vehicle Credits";  };  
if (x=="re") {; local title "Residential Energy Credits";  };  

*Create an extra observation that starts all variables at the origin;
set obs `=_N+1'; replace y = -1 in `=_N'; 
foreach var of varlist totalincome totalreturns `x'_amountar {;
replace cdf_`var'=0 in `=_N'; }; sort y;

*Plot concentration curves;
scatter cdf_`x'_amountar cdf_totalincome cdf_totalreturns cdf_totalreturns, 
	   msymbol(i i i) connect(l l l) lcolor(black dkorange gs9) 
	   lpattern(dash shortdash dot) lwidth(thick medium medium)
	   xtitle("Cumulative Fraction of Tax Returns, Ranked by AGI", size(medium)) 
	   ytitle("Cumulative Fraction of Credits and AGI", size(medium)) 
	   title("`title'", color(black))
	   xlabel(, labsize(medium) nogrid) 
	   ylabel(, angle(0) nogrid labsize(medium))
       legend(label(1 "credit") label(2 "AGI") label(3 "45 degree line") 
	   size(medium) cols(1) position(11) ring(0) bmargin(l+2 t+2))
       plotregion(lcolor(black)) graphregion(fcolor(white) lcolor(white));
graph export "`root'/figs/concentration_`x'.pdf", as(pdf) replace; };


*-------------------------------------------------*;
*---------Calculate Concentration Indices---------*;
*-------------------------------------------------*;
*For this part of code I borrowed heavily from Walter;
use cleaned.dta, clear;
local varlist="totalreturns re_amountar ev_amountar";
collapse (sum) `varlist', by(y);
gen totalincome=y*totalreturns;
foreach var of varlist totalincome `varlist' {;
quietly sum `var'; sort y; gen cdf_`var'=sum(`var')/`r(sum)'; };

*Sanity check, calculate the area under 45 degree line, should be 0.5;
gen area_AB=0.5*(cdf_totalreturns+cdf_totalreturns[_n-1])*(cdf_totalreturns-cdf_totalreturns[_n-1]);
quietly sum area_AB; display "area under 45 degree line = " round(`r(sum)',.001); 

*Calculate concentration index for AGI, also known as the Gini coefficient;
gen area_B_AGI=0.5*(cdf_totalincome+cdf_totalincome[_n-1])*(cdf_totalreturns-cdf_totalreturns[_n-1]);
quietly sum area_B_AGI; local area_B_AGI=round(`r(sum)',.001); display "area under AGI line = `area_B_AGI'";
display "gini coefficient = " round((0.5-`area_B_AGI')/0.5,.001);
	
*Finally calculate concentration index for credits;
foreach x in re ev {;
gen area_B_`x'=0.5*(cdf_`x'_amountar+cdf_`x'_amountar[_n-1])*(cdf_totalreturns-cdf_totalreturns[_n-1]);
quietly sum area_B_`x'; local total_area_B = `r(sum)'; display "area under credit line = " round(`r(sum)',.01);
display "concentration coefficient for `x' = " round((0.5-`total_area_B')/0.5,.001); };



*-----------------------------------------------------*;
*-------Examine Concentration Indices Over Time-------*;
*-----------------------------------------------------*;
*Create excel sheet with concentration indices by year;
putexcel set "`root'/concentration", replace;
forvalues t = 2006/2021 {; use cleaned.dta, clear; keep if year==`t';
local varlist="totalreturns re_amountar ev_amountar";
collapse (sum) `varlist', by(y);
foreach var of varlist `varlist' {;
quietly sum `var'; local sum=`r(sum)'; sort y; gen cdf_`var'=sum(`var')/`sum'; };
foreach x in ev re {;
gen area_B_`x'=0.5*(cdf_`x'_amountar+cdf_`x'_amountar[_n-1])*(cdf_totalreturns-cdf_totalreturns[_n-1]);
quietly sum area_B_`x'; local concen_`x'=(0.5-`r(sum)')/0.5; };
putexcel A`t' = `concen_re'; putexcel B`t' = `concen_ev'; }; 

*Import and plot concentration indices by year;
import excel using "`root'/concentration.xlsx", clear cellrange(A2006:B2021); 
gen year=_n+2005; rename A re; rename B ev; 
replace ev=. if year<=2008 | year==2010;
scatter ev re year, 
	   msymbol(i i) connect(l l) lcolor(black dkorange) 
	   lpattern(dash shortdash) lwidth(thick thick)
	   xtitle("", size(medium)) 
	   ytitle("Concentration Index", size(medium)) 
	   title("", color(black))
	   xlabel(2006(4)2022, labsize(medium) nogrid) 
	   ylabel(0(.1)1, angle(0) nogrid labsize(medium))
	   yscale(range(0.00 1.05))
       legend(label(1 "Electric Vehicle Credits") label(2 "Residential Energy Credits") 
	   size(medium) cols(1) position(7) ring(0) bmargin(l+2 t+0.01))
       plotregion(lcolor(black)) graphregion(fcolor(white) lcolor(white));
graph export "`root'/figs/concentration_by_year.pdf", as(pdf) replace; 
erase concentration.xlsx;


*-----------------------------------------------------*;
*------------Plot Tax Expenditures Over Time----------*;
*-----------------------------------------------------*;
*Table 1 in the Paper;
clear; input year NEPC REEPC PEDVC;
2005 0    0     0;
2006 957  43    0;
2007 938  69    0;
2008 0    217   0;
2009 5177 645   129;
2010 5420 754   1;
2011 755  921   76;
2012 449  818   139;
2013 622  992   231;
2014 518  1120  263;
2015 517  1570  252;
2016 513  1823  375;
2017 248  1877  537;
2018 0    2512  1541;
2019 331  3176  643;
2020 406  3469  313;
2021 446  4886  1037;
end; sort year;

scatter NEPC REEPC PEDVC year, 
	   msymbol(i i) connect(l l) lcolor(black dkorange) 
	   lpattern(dash shortdash) lwidth(thick thick)
	   xtitle("", size(medium)) 
	   ytitle("Expenditures", size(medium)) 
	   title("", color(black))
	   xlabel(2006(4)2022, labsize(medium) nogrid) 
	   ylabel(0(.1)1, angle(0) nogrid labsize(medium))
	   yscale(range(0.00 1.05))
       plotregion(lcolor(black)) graphregion(fcolor(white) lcolor(white));

gen solar_ratio = REEPC/(NEPC+REEPC);
scatter solar_ratio year, 
	   msymbol(i) connect(l) lcolor(black) 
	   lpattern(dash) lwidth(thick)
	   xtitle("", size(medium)) 
	   ytitle("", size(medium)) 
	   title("", color(black))
	   xlabel(2006(4)2022, labsize(medium) nogrid) 
	   ylabel(0(.1)1, angle(0) nogrid labsize(medium))
	   yscale(range(0.00 1.05))
       plotregion(lcolor(black)) graphregion(fcolor(white) lcolor(white));
graph export "`root'/figs/expenditureratio_by_year.pdf", as(pdf) replace; 
